{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "%matplotlib inline"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "\n",
    "# Logistic Regression 3-class Classifier\n",
    "\n",
    "\n",
    "Show below is a logistic-regression classifiers decision boundaries on the\n",
    "`iris <https://en.wikipedia.org/wiki/Iris_flower_data_set>`_ dataset. The\n",
    "datapoints are colored according to their labels.\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "print(__doc__)\n",
    "\n",
    "\n",
    "# Code source: Gaël Varoquaux\n",
    "# Modified for documentation by Jaques Grobler\n",
    "# License: BSD 3 clause\n",
    "\n",
    "import numpy as np\n",
    "import matplotlib.pyplot as plt\n",
    "from sklearn import linear_model, datasets\n",
    "\n",
    "# import some data to play with\n",
    "iris = datasets.load_iris()\n",
    "X = iris.data[:, :2]  # we only take the first two features.\n",
    "Y = iris.target\n",
    "\n",
    "h = .02  # step size in the mesh\n",
    "\n",
    "logreg = linear_model.LogisticRegression()\n",
    "\n",
    "# we create an instance of Neighbours Classifier and fit the data.\n",
    "logreg.fit(X, Y)\n",
    "\n",
    "# Plot the decision boundary. For that, we will assign a color to each\n",
    "# point in the mesh [x_min, x_max]x[y_min, y_max].\n",
    "x_min, x_max = X[:, 0].min() - .5, X[:, 0].max() + .5\n",
    "y_min, y_max = X[:, 1].min() - .5, X[:, 1].max() + .5\n",
    "xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))\n",
    "Z = logreg.predict(np.c_[xx.ravel(), yy.ravel()])\n",
    "\n",
    "# Put the result into a color plot\n",
    "Z = Z.reshape(xx.shape)\n",
    "plt.figure(1, figsize=(4, 3))\n",
    "plt.pcolormesh(xx, yy, Z, cmap=plt.cm.Paired)\n",
    "\n",
    "# Plot also the training points\n",
    "plt.scatter(X[:, 0], X[:, 1], c=Y, edgecolors='k', cmap=plt.cm.Paired)\n",
    "plt.xlabel('Sepal length')\n",
    "plt.ylabel('Sepal width')\n",
    "\n",
    "plt.xlim(xx.min(), xx.max())\n",
    "plt.ylim(yy.min(), yy.max())\n",
    "plt.xticks(())\n",
    "plt.yticks(())\n",
    "\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": false
   },
   "source": [
    "Para ver as predições do nosso classificador podemos usar a função predict"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "logreg.predict(X)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "logreg.score(X,Y)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": true
   },
   "source": [
    "1. Utilize o classificador logistico na dataset haberman.txt (o mesmo utilizado na aula do knn) usando todos os atributos. Não esqueça de separar os dados em treino e teste\n",
    "\n",
    "2. Implemente as funções precision e recall para avaliar os resultados"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "from sklearn import linear_model\n",
    "\n",
    "data = np.loadtxt('haberman.data',delimiter=',')\n",
    "#vamos embaralhar os dados antes de dividir treino e teste\n",
    "rdata = np.random.permutation(data)\n",
    "l,c = rdata.shape\n",
    "X = rdata[:,:c-1]\n",
    "y = rdata[:,c-1]\n",
    "\n",
    "nt = int(l * 0.8)\n",
    "X_train = X[:nt,:]\n",
    "X_test = X[nt:,:]\n",
    "y_train = y[:nt]\n",
    "y_test = y[nt:]\n",
    "\n",
    "logreg = linear_model.LogisticRegression(C=1e5)\n",
    "logreg.fit(X_train,y_train)\n",
    "logreg.score(X_test,y_test)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "#Na aula do dia 05-10-16 implementamos uma função, vamos alterá-la para\n",
    "#podermos calcular os valores por classe\n",
    "def confusionMatrix(predicted, actual, class_value):\n",
    "    if len(predicted) != len(actual): return -1\n",
    "    tp = 0.0\n",
    "    fp = 0.0\n",
    "    tn = 0.0\n",
    "    fn = 0.0\n",
    "    for i in range(len(actual)):\n",
    "        if actual[i] == class_value: #labels that are equal to the actual class  (positive examples)\n",
    "            if predicted[i] == class_value:\n",
    "                tp += 1.0 #correctly predicted positive\n",
    "            else:\n",
    "                fn += 1.0 #incorrectly predicted negative\n",
    "        else:              #labels that are different than the actual class (negative examples)\n",
    "            if predicted[i] != class_value:\n",
    "                tn += 1.0 #correctly predicted negative\n",
    "            else:\n",
    "                fp += 1.0 #incorrectly predicted positive\n",
    "    rtn = [tp, fn, fp, tn]\n",
    "    return rtn\n",
    "\n",
    "def precision(tp,fp):\n",
    "    return tp/(tp+fp)\n",
    "\n",
    "def recall (tp,fn):\n",
    "    return tp/(tp+fn)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "yb = logreg.predict(X_test)\n",
    "#vamos verificar os valores para as 2 classes\n",
    "tp, fn, fp, tn = confusionMatrix(yb,y_test,1.0) #Classe 1\n",
    "print tp, fn, fp, tn\n",
    "print \"Precision: \" + str(precision(tp,fp))\n",
    "print \"Recall: \" + str(recall(tp,fn))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "tp, fn, fp, tn = confusionMatrix(yb,y_test,2.0) #Classe 2\n",
    "print tp, fn, fp, tn\n",
    "print \"Precision: \" + str(precision(tp,fp))\n",
    "print \"Recall: \" + str(recall(tp,fn))\n"
   ]
  }
 ],
 "metadata": {
  "anaconda-cloud": {},
  "kernelspec": {
   "display_name": "Python [default]",
   "language": "python",
   "name": "python2"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.12"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
}
